library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
"%&%" = function(a,b) paste(a,b,sep="")
my.dir = "/home/lauren/files_for_revisions_plosgen/fst_results/"
#my.dir = "~/mount/lauren/files_for_revisions_plosgen/fst_results/"

AFA

afa <- fread(my.dir %&% "fst_table_AFA.txt")
mean_afa <- afa[,list(mean_fstCAUafa=mean(fstCAUafa, na.rm = TRUE),mean_fstAFAhis=mean(fstAFAhis, na.rm = TRUE),
  R2_AFA=mean(R2_AFA, na.rm = TRUE), R2_CAU=mean(R2_CAU, na.rm = TRUE), 
  R2_HIS=mean(R2_HIS, na.rm = TRUE), 
  betaAFA_fstCAUafa=sum(fstCAUafa * abs(betaAFA), na.rm = TRUE)/table(is.na(fstCAUafa * betaAFA))[[1]],
  betaAFA_fstAFAhis=sum(fstAFAhis * abs(betaAFA), na.rm = TRUE)/table(is.na(fstAFAhis * betaAFA))[[1]]),
  by=GENE]

mean_afa_0.2 <- dplyr::filter(mean_afa, R2_AFA > 0.2 | R2_CAU > 0.2)
ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_CAU,y=betaAFA_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_CAU,y=mean_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 6 rows containing non-finite values (stat_density2d).
## Warning: Removed 6 rows containing missing values (geom_point).

mean_afa_0.2 <- dplyr::filter(mean_afa, R2_AFA > 0.2 | R2_HIS > 0.2)
ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_HIS,y=betaAFA_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_HIS,y=mean_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).

CAU

cau <- fread(my.dir %&% "fst_table_CAU.txt")
mean_cau <- cau[,list(mean_fstCAUafa=mean(fstCAUafa, na.rm = TRUE),mean_fstHIScau=mean(fstHIScau, na.rm = TRUE),
  R2_AFA=mean(R2_AFA, na.rm = TRUE), R2_CAU=mean(R2_CAU, na.rm = TRUE), 
  R2_HIS=mean(R2_HIS, na.rm = TRUE), 
  betaCAU_fstCAUafa=sum(fstCAUafa * abs(betaCAU), na.rm = TRUE)/table(is.na(fstCAUafa * betaCAU))[[1]],
  betaCAU_fstHIScau=sum(fstHIScau * abs(betaCAU), na.rm = TRUE)/table(is.na(fstHIScau * betaCAU))[[1]]),
  by=GENE]

mean_cau_0.2 <- dplyr::filter(mean_cau, R2_AFA > 0.2 | R2_CAU > 0.2)
ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_AFA,y=betaCAU_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_AFA,y=mean_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 6 rows containing non-finite values (stat_density2d).
## Warning: Removed 6 rows containing missing values (geom_point).

mean_cau_0.2 <- dplyr::filter(mean_cau, R2_CAU > 0.2 | R2_HIS > 0.2)
ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_HIS,y=betaCAU_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_HIS,y=mean_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).
## Warning: Removed 1 rows containing missing values (geom_point).

HIS

his <- fread(my.dir %&% "fst_table_HIS.txt")
mean_his <- his[,list(mean_fstAFAhis=mean(fstAFAhis, na.rm = TRUE),mean_fstHIScau=mean(fstHIScau, na.rm = TRUE),
  R2_AFA=mean(R2_AFA, na.rm = TRUE), R2_CAU=mean(R2_CAU, na.rm = TRUE), 
  R2_HIS=mean(R2_HIS, na.rm = TRUE), 
  betaHIS_fstAFAhis=sum(fstAFAhis * abs(betaHIS), na.rm = TRUE)/table(is.na(fstAFAhis * betaHIS))[[1]],
  betaHIS_fstHIScau=sum(fstHIScau * abs(betaHIS), na.rm = TRUE)/table(is.na(fstHIScau * betaHIS))[[1]]),
  by=GENE]

mean_his_0.2 <- dplyr::filter(mean_his, R2_AFA > 0.2 | R2_HIS > 0.2)
ggplot(mean_his_0.2, aes(x=R2_HIS-R2_AFA,y=betaHIS_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_his_0.2, aes(x=R2_HIS-R2_AFA,y=mean_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).

mean_his_0.2 <- dplyr::filter(mean_his, R2_CAU > 0.2 | R2_HIS > 0.2)
ggplot(mean_his_0.2, aes(x=R2_HIS-R2_CAU,y=betaHIS_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_his_0.2, aes(x=R2_HIS-R2_CAU,y=mean_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).
## Warning: Removed 1 rows containing missing values (geom_point).

facet_wrap plots

afa_cau <- dplyr::select(mean_afa_0.2, GENE, pop1=R2_AFA, pop2=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaAFA_fstCAUafa) %>% 
  mutate(pop="AFA-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

afa_his <- dplyr::select(mean_afa_0.2, GENE, pop1=R2_AFA, pop2=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaAFA_fstAFAhis) %>% 
  mutate(pop="AFA-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_afa <- dplyr::select(mean_cau_0.2, GENE, pop2=R2_AFA, pop1=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaCAU_fstCAUafa) %>% 
  mutate(pop="CAU-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_his <- dplyr::select(mean_cau_0.2, GENE, pop1=R2_CAU, pop2=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaCAU_fstHIScau) %>% 
  mutate(pop="CAU-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_afa <- dplyr::select(mean_his_0.2, GENE, pop2=R2_AFA, pop1=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaHIS_fstAFAhis) %>% 
  mutate(pop="HIS-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_cau <- dplyr::select(mean_his_0.2, GENE, pop2=R2_CAU, pop1=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaHIS_fstHIScau) %>% 
  mutate(pop="HIS-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

all <- rbind(afa_cau,afa_his,cau_afa,cau_his,his_afa,his_cau)

all <- mutate(all,pop=factor(pop,levels=c("AFA-CAU","CAU-AFA","AFA-HIS","HIS-AFA","HIS-CAU","CAU-HIS")))


ggplot(all, aes(x=diffR2,y=beta_Fst)) + facet_wrap(~pop,nrow=3) + 
  geom_point(col="gray") + geom_density_2d() + 
  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("weighted mean ", F[ST]))) + 
  theme_bw(14)
## Warning: Removed 65 rows containing non-finite values (stat_density2d).
## Warning: Removed 65 rows containing missing values (geom_point).

ggplot(all, aes(x=diffR2,y=beta_Fst)) + facet_wrap(~pop,nrow=3) + 
  geom_point(col="gray") + geom_density_2d() + 
  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("weighted mean ", F[ST]))) + 
  theme_bw(14) + coord_cartesian(ylim=c(0,0.02))
## Warning: Removed 65 rows containing non-finite values (stat_density2d).

## Warning: Removed 65 rows containing missing values (geom_point).

ggplot(all, aes(x=diffR2,y=mean_Fst)) + facet_wrap(~pop,nrow=3) + geom_point(col='gray') + geom_density_2d() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("mean ", F[ST]))) + theme_bw(14)
## Warning: Removed 106 rows containing non-finite values (stat_density2d).
## Warning: Removed 106 rows containing missing values (geom_point).

suball <- dplyr::filter(all, pop=='AFA-CAU' | pop=='AFA-HIS' | pop=='CAU-HIS')

c <- ggplot(suball, aes(x=diffR2,y=mean_Fst)) + facet_wrap(~pop,nrow=1) + geom_point(col='gray') + geom_density_2d() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("mean ", F[ST])),title='C') + theme_bw(14) + coord_cartesian(xlim=c(-0.85,0.85)) + theme(plot.margin=unit(c(0,0.2,0,0.2), "cm"))
print(c)
## Warning: Removed 53 rows containing non-finite values (stat_density2d).
## Warning: Removed 53 rows containing missing values (geom_point).

no filter

afa_cau <- dplyr::select(mean_afa, GENE, pop1=R2_AFA, pop2=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaAFA_fstCAUafa) %>% 
  mutate(pop="AFA-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

afa_his <- dplyr::select(mean_afa, GENE, pop1=R2_AFA, pop2=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaAFA_fstAFAhis) %>% 
  mutate(pop="AFA-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_afa <- dplyr::select(mean_cau, GENE, pop2=R2_AFA, pop1=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaCAU_fstCAUafa) %>% 
  mutate(pop="CAU-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_his <- dplyr::select(mean_cau, GENE, pop1=R2_CAU, pop2=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaCAU_fstHIScau) %>% 
  mutate(pop="CAU-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_afa <- dplyr::select(mean_his, GENE, pop2=R2_AFA, pop1=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaHIS_fstAFAhis) %>% 
  mutate(pop="HIS-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_cau <- dplyr::select(mean_his, GENE, pop2=R2_CAU, pop1=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaHIS_fstHIScau) %>% 
  mutate(pop="HIS-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

all_nof <- rbind(afa_cau,afa_his,cau_afa,cau_his,his_afa,his_cau)
ggplot(all_nof, aes(x=diffR2,y=pop1,col=pop2)) + facet_wrap(~pop,nrow=3) + geom_point() +  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2))) + theme_bw(14) + 
  geom_density_2d()
## Warning: Removed 3572 rows containing non-finite values (stat_density2d).
## Warning: Removed 3572 rows containing missing values (geom_point).

suball_nof <- dplyr::filter(all_nof, pop=='AFA-CAU' | pop=='AFA-HIS' | pop=='CAU-HIS')
b <- ggplot(suball_nof, aes(x=diffR2,y=pop1,col=pop2)) + facet_wrap(~pop,nrow=1) + geom_point() +  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)),title='B') + theme_bw(14) + 
  scale_color_gradientn(colours = c('blue','pink')) + 
  theme(legend.justification=c(0,1), legend.position=c(0.005,0.995),legend.title = element_text(size=10),legend.text = element_text(size=10),legend.key.size = unit(0.3, "cm"),plot.margin=unit(c(0,0.2,0,0.2), "cm")) + 
  coord_cartesian(xlim=c(-0.85,0.85)) + geom_density_2d(col='gray') + scale_size(range=c(5,20))
print(b)
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).

a <- ggplot(suball_nof, aes(x=pop1,y=pop2)) + geom_point() + geom_smooth(method="lm") + facet_wrap(~pop,nrow=1) + theme_bw(14)+
  labs(x=expression(paste("pop1 ", R^2)),y=expression(paste("pop2 ", R^2)),title='A') + geom_density_2d(col="gray") +
  theme(plot.margin=unit(c(0.05,0.2,0,0.2), "cm"))

print(a)
## Warning: Removed 1786 rows containing non-finite values (stat_smooth).
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).

grid.arrange(a, b, c, nrow=3)
## Warning: Removed 1786 rows containing non-finite values (stat_smooth).
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).
## Warning: Removed 53 rows containing non-finite values (stat_density2d).
## Warning: Removed 53 rows containing missing values (geom_point).

ggplot(all,aes(x=pop1,y=mean_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) + 
  scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + 
  labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))
## Warning: Removed 106 rows containing non-finite values (stat_density2d).
## Warning: Removed 106 rows containing missing values (geom_point).

ggplot(all,aes(x=pop1,y=beta_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) + 
   scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + 
  labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=pop1,y=beta_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) + 
   scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + coord_cartesian(ylim=c(0,0.02)) + 
  labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=mean_Fst,col=pop)) + geom_density() + coord_cartesian(xlim=c(0,0.2))
## Warning: Removed 106 rows containing non-finite values (stat_density).

ggplot(all,aes(x=beta_Fst,col=pop)) + geom_density() + coord_cartesian(xlim=c(0,0.01))